#In this project, we build a image classifier to classify phenotypes corresponding to MSL2 ‘territories’,‘speckles’ and ‘intermediates’ across 12 different genotypes.
Test image classifier
Visualize a subset of nuclei from each class using EBI package. Default readTIFF option returns scaled 0-1 grayscale values, as.is=T returns raw values - raw values appear to be 16 bit (max value is 65280 .ie. 2^16 -256)
Note: Verify which channel signal will be used for classification. For this set, GFP (channel 2) is the appropriate channel
## Extract categories -
Train_categories <- unique(gsub("_.*","",gsub("Images_Train/","",list_files)))
## Display 6 nuclei from each cateogory
image_raw <- lapply(list_files, function(i) readTIFF(i,as.is = T)[,,2])
i <- 1
j <- 1
set.seed(12554)
par(mfrow=c(2,3)) ## adjust margins based on number of images
for(i in 1:length(Train_categories)){
my_category_images <- list_files[grep(Train_categories[i],list_files)]
my_category_images_sub <- my_category_images[sample.int(length(my_category_images),6)]
for (j in 1:6){
image(readTIFF(my_category_images_sub[j],as.is=T)[,,2],col=gray.colors(256), main=paste(Train_categories[i],"_",j),asp=1)
axis(1,0:1,0:1)
}
}
Now extract Gray-Level Co-occurrence Matrix (GLCM) features for whole image. Also segment and calculate object level features. Representative image, histogram of signal distribution and segmented image for each of Intermediate, Speckle and Territory classes are displayed.
res_method <- as.data.frame(matrix(nrow=length(list_files),ncol=46))
#Test case images
i <- 3 #Intermediate
i <- 167 #speckles
i <- 374 #territory
for (i in 1:length(list_files)){
y <- readImage(list_files[i])[,,2]
imagename <- gsub("_.*","",gsub(".*/","",list_files[i]))
#### for visualization purposes in test cases -- Histogram lets you define background intensity for object detection
if (i %in% c(3,167,374)){
EBImage::display(y)
hist(as.vector(imageData(y)),main=imagename)
}
#### Compute Haralick features for entire image
x <- Image(1,dim=dim(y))
res_method[i,1:26] <- colMeans(computeFeatures.haralick(x, y))
### Add mean intensity of image
res_method[i,27] <- mean(imageData(y))
### Detect objects by thresholding on gaussian blurred images and compute additional object specific features
y_2 <- gblur(y,sigma=0.5) ### mild blurring - sigma must be selected appropriately !
#EBImage::display(y_2,all=T)
cutoff <- 0.3 ## background cutoff selected based on histogram
x <- y_2 > cutoff
if (i %in% c(3,167,374)){
EBImage::display(x,all=T)
}
#if no objects detected, then object feature is 0
if(sum(imageData(x))==0){
res_method[i,28:46] <- 0
} else {
res_method[i,28:46] <- c(computeFeatures.moment(x,y), computeFeatures.shape(x), computeFeatures.basic(x,y))
}
rownames(res_method)[i] <- gsub("_RGB.tif","",gsub("Images_Train/","",list_files[i]))
}
Each row of output dataframe represents an image, each column is the image-averaged extracted feature. Outcome variable is the class label
head(res_method)
## V1 V2 V3
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.08855262 1.777328 0.7577260
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.05843461 2.642324 0.7717139
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.14894853 2.173823 0.8104294
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.14921056 1.867110 0.7574788
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.07663609 1.687390 0.7737413
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.06976166 2.359195 0.8030911
## V4 V5 V6
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 4.668012 0.6750258 5.387328
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 6.787309 0.5831728 8.102039
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 6.733544 0.7175006 5.283571
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 4.849375 0.7164898 4.487800
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 4.728897 0.6424870 6.082627
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 6.990576 0.6329554 6.168019
## V7 V8 V9
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 32.33825 0.9778461 1.278933
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 69.53804 1.0998163 1.517621
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 39.28764 0.9792376 1.246613
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 26.47547 0.8898718 1.147063
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 38.83597 1.0222149 1.359136
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 47.53571 1.0756128 1.424631
## V10 V11 V12
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 1.777328 0.5092955 0.2278099
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 2.642324 0.5878146 0.1938215
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 2.173823 0.5073276 0.3366114
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 1.867110 0.4909483 0.2436463
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 1.687390 0.5180242 0.2185738
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 2.359195 0.5573914 0.2567113
## V13 V14 V15
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.5293558 0.07772315 2.879286
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.5272450 0.06060207 3.999657
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.6294582 0.12817032 3.731799
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.5221130 0.13038258 2.475062
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.5325514 0.06678650 2.321121
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.5853827 0.06199452 4.193571
## V16 V17 V18
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.6243313 4.832214 0.6225742
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.6542558 6.784126 0.5557300
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.6896816 7.012856 0.6732657
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.6500609 4.536417 0.6770442
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.6974503 4.835933 0.5979742
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.6944256 7.861785 0.5935076
## V19 V20 V21
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 5.520714 33.02087 0.9851549
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 8.178228 69.46976 1.0836587
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 5.375343 39.51140 0.9945011
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 4.555111 24.98735 0.9059002
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 6.254002 40.39662 1.0219847
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 6.358571 51.05604 1.0857665
## V22 V23 V24
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 1.329097 2.879286 0.5779118
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 1.522363 3.999657 0.6329055
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 1.299336 3.731799 0.5758876
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 1.197645 2.475062 0.5448614
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 1.410643 2.321121 0.5676343
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 1.477456 4.193571 0.6304734
## V25 V26
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.1766458 0.4764845
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.1658622 0.4905949
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.2755093 0.5829002
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.1861049 0.4667834
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.1698007 0.4798931
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.2063121 0.5367891
## V27 V28 V29
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.07502884 34.77017 34.75766
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.11667160 45.90038 21.23337
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.07237144 31.37827 16.20754
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.06055882 26.75443 37.59454
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.08585621 26.39859 35.02931
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.08656737 23.10168 15.28314
## V30 V31
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 17.04385 0.8851290
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 26.12551 0.8855931
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 17.64074 0.5695780
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 25.04194 0.9088548
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 30.95548 0.8838904
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 20.75218 0.7579586
## V32 V33 V34 V35
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 -0.23467400 41 7 1.009265
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 -0.06850603 107 43 4.220496
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 -0.33112465 62 32 3.765024
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 -0.36565479 36 2 0.500000
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 -0.59981352 37 1 0.000000
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 -0.15170294 93 49 4.673231
## V36 V37 V38
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.5361964 0.5150788 1.921946
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 1.6349564 0.5929092 7.063032
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 1.5638702 0.2576941 6.200113
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.0000000 0.5000000 0.500000
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.0000000 0.0000000 0.000000
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 1.6108684 0.3421032 7.224865
## V39 V40
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.3899570 0.08509916
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.3995235 0.08038216
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.4208729 0.09880836
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.3992375 0.09724866
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.3616322 0.05569866
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.3889943 0.08320161
## V41 V42
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.06976941 0.2792157
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.09302588 0.2823529
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.09302588 0.2823529
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.05814118 0.2509804
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.04651294 0.2823529
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.06976941 0.2980392
## V43 V44 V45
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.2980392 0.3607843 0.5843137
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.2980392 0.3921569 0.5333333
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.3137255 0.4078431 0.5990196
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.2980392 0.3764706 0.5490196
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.3074510 0.3450980 0.4705882
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.2980392 0.3764706 0.5647059
## V46
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.6149020
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19 0.5951373
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8 0.7188627
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5 0.7209804
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.5433725
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18 0.6654118
Test visualization the features for individual classes
## Heatmap
hm <- ComplexHeatmap::Heatmap((scale(res_method)),show_row_names = F, width=10)
labels <- as.data.frame(res_method) %>% rownames_to_column("Phenotype") %>% mutate(Phenotype=case_when(grepl("Territory",Phenotype)~"Territory",
grepl("Speckles",Phenotype)~"Speckles",
grepl("Intermediate",Phenotype)~"Intermediate")) %>%dplyr::select(Phenotype)
lab_hm <- Heatmap(labels,row_order = row_order(hm), width=1,col = c("Grey","Red","Black"))
## Warning: The input is a data frame, convert it to a matrix.
## Warning: Note: not all columns in the data frame are numeric. The data frame
## will be converted into a character matrix.
## Warning: The heatmap has not been initialized. You might have different results
## if you repeatedly execute this function, e.g. when row_km/column_km was
## set. It is more suggested to do as `ht = draw(ht); row_order(ht)`.
hm_full <- hm + lab_hm
draw(hm_full)
## PCA
pca_res <- prcomp(res_method,scale. = T)
ggplot2::autoplot(pca_res,data=cbind(res_method,labels),colour="Phenotype")
## Umap
umap_res <- umap(res_method)
umap_df <- cbind(as.data.frame(umap_res$layout),labels)
umap_df %>% ggplot(aes(x=V1,y=V2,col=Phenotype)) + geom_point()
A clustered heatmap : Shows that speckles and territories separate quite well but ‘Intermediates’ are distributed everywhere -> Linear separation methods wont that well
PCA : Shows that intermediate points are scattered in between well-separated speckles and territories
UMAP : Looks funky, need to verify if highly correlated variables can cause issues with UMAP
Now test different ML approaches for classification. We will compare Logistic Regression with Elastic Nets, SVM and Boosted Decision Trees.
First, we define our train-test splits (80-20), cross validation scheme (10-fold CV) and get a sense of how correlated the predictors are so we can decide on appropriate Feature selection approaches.
set.seed(125)
res_fin <- cbind(res_method,Phenotype=as.factor(labels[,"Phenotype"]))
saveRDS(res_fin,file="241009_GLCMmetrics.rds")
#### Define train and test data splits - Use 80-20 split.
res_split <- initial_split(res_fin,prop=0.8,strata=Phenotype)
res_training <- res_split %>% training()
res_testing <- res_split %>% testing()
res_folds <- vfold_cv(res_training,v=10,strata=Phenotype)
#### Are there any highly correlated variables ?
res_fin %>% select_if(is.numeric) %>% corrr::correlate() %>% shave() %>% rplot(print_cor = T) + theme(axis.text.x = element_text(angle=90,hjust = 1))
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
Confirming what we previously saw in the heatmap, many Haralick texture features are highly correlated. We can use appropriate feature selection method depending on ML algorithm.
Now we remove variables highly correlated to other predictors (threshold of r=0.9) as well as remove Near-zero variance predictors. We don’t have to be very stringent as Regularization will shrink/minimize/eliminate the influence of correlated variables.
Note 1 : scaling/normalization is not important for simple regression algorithms. However, it is more important for distance based algorithms (SVM etc) or magnitude dependent penalty algorithms (Lasso for instance)
Note 2: Scale sets standard dev to 1, normalize sets stdev to 1 and mean to 0 -> Pick depending on application ! Non monotonic transformations like normalization can affect outcomes !
## Recipe for preprocess
filt_recipe_glm <- recipe(Phenotype~.,data=res_fin) %>%
step_normalize(all_numeric_predictors()) %>%
step_nzv(all_predictors()) %>%
step_corr(all_numeric(),threshold=0.9)
## train recipe
filt_prep_glm <- filt_recipe_glm %>% prep(training=res_training)
## removes 29 highly correlated variables ! (note that if two vars are correlated to each other, only one is removed)
tidy(filt_prep_glm,number=3)
## # A tibble: 29 × 2
## terms id
## <chr> <chr>
## 1 V7 corr_gPMNQ
## 2 V9 corr_gPMNQ
## 3 V11 corr_gPMNQ
## 4 V13 corr_gPMNQ
## 5 V19 corr_gPMNQ
## 6 V20 corr_gPMNQ
## 7 V21 corr_gPMNQ
## 8 V22 corr_gPMNQ
## 9 V24 corr_gPMNQ
## 10 V26 corr_gPMNQ
## # ℹ 19 more rows
### Bake recipe to obtain clean training data - Optional to visualize transformed training data
filt_bake_glm <- filt_prep_glm %>% bake(new_data=NULL)
Our preprocessing removes 29 variables !
Now we build the classifier. Optimal parameters for penalty and mixture(between lasso and ridge) are found through grid search
#### Define model
res_model_glmnet <- multinom_reg(penalty = tune(),mixture=tune()) %>% set_mode("classification") %>% set_engine("glmnet")
res_model_glmnet_workflow <- workflow() %>% add_recipe(filt_recipe_glm) %>% add_model(res_model_glmnet)
penalty_grid <- grid_regular(penalty(range=c(-3,-1)),mixture(range=c(0,1)),levels=c(mixture=5,penalty=20)) ## 100 models !
glmnet_grid <- tune_grid(res_model_glmnet_workflow,resamples = res_folds,grid=penalty_grid)
autoplot(glmnet_grid,metric="roc_auc")
Looks like regularization penalty close to 0 works best, while mixture (Prop of Lasso Penalty) is optimal at 1 .ie. mixture=1 meaning ridge regression.
Now we select best model based on ROC_AUC and collect metrics
best_vals <- glmnet_grid %>% select_best(metric="accuracy") ## select best based on roc_auc or accuracy - result is similar !
### finalize model
res_model_metrics <- finalize_workflow(res_model_glmnet_workflow,best_vals) %>% fit_resamples(resamples=res_folds) %>% collect_metrics()
print(res_model_metrics)
## # A tibble: 3 × 6
## .metric .estimator mean n std_err .config
## <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 accuracy multiclass 0.777 10 0.0204 Preprocessor1_Model1
## 2 brier_class multiclass 0.168 10 0.00843 Preprocessor1_Model1
## 3 roc_auc hand_till 0.908 10 0.0110 Preprocessor1_Model1
Resampled roc_auc is ~91% (ratio of true positives to false positives), accuracy is fairly low with 78%. Note that in previous tests we had achieved higher accuracies with better quality (.ie. higher resolution) images - Also verify training labels again !
Apply model on whole dataset to get final metrics. Output df_res object contains original class labels, predicted class labels as well as class specific probabilities. (Note that row order corresponds to same rows defined in the res_fin object containing file names)
res_model_glmnet_final <- finalize_workflow(res_model_glmnet_workflow,best_vals) %>% last_fit(res_split) ### final fit to train-test split
res_model_glmnet_final %>% collect_metrics()
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy multiclass 0.815 Preprocessor1_Model1
## 2 roc_auc hand_till 0.915 Preprocessor1_Model1
## 3 brier_class multiclass 0.164 Preprocessor1_Model1
## res_model_final %>% extract_workflow() ### This retrieves the model object for future use !
df_res <-data.frame(res_model_glmnet_final %>% extract_workflow() %>% predict(res_fin),res_model_glmnet_final %>% extract_workflow() %>% predict(res_fin,type="prob"),res_fin$Phenotype)
print(df_res)
## .pred_class .pred_Intermediate .pred_Speckles .pred_Territory
## 1 Intermediate 6.256659e-01 2.975573e-01 7.677671e-02
## 2 Intermediate 7.590915e-01 9.865113e-02 1.422574e-01
## 3 Intermediate 6.511714e-01 3.034147e-01 4.541389e-02
## 4 Intermediate 6.356624e-01 1.165913e-01 2.477463e-01
## 5 Intermediate 5.355880e-01 5.529285e-02 4.091191e-01
## 6 Intermediate 6.696319e-01 1.022333e-01 2.281348e-01
## 7 Intermediate 5.777025e-01 1.315914e-01 2.907061e-01
## 8 Intermediate 7.749291e-01 1.180196e-01 1.070513e-01
## 9 Intermediate 5.180655e-01 3.596811e-01 1.222534e-01
## 10 Intermediate 5.317792e-01 3.979316e-01 7.028914e-02
## 11 Intermediate 6.813782e-01 1.957112e-01 1.229106e-01
## 12 Intermediate 4.995264e-01 4.537030e-01 4.677061e-02
## 13 Intermediate 5.570694e-01 1.281301e-01 3.148005e-01
## 14 Intermediate 7.023743e-01 1.416337e-01 1.559921e-01
## 15 Territory 1.998480e-01 3.920204e-03 7.962318e-01
## 16 Intermediate 6.550931e-01 6.168515e-02 2.832218e-01
## 17 Intermediate 6.350683e-01 2.461447e-01 1.187871e-01
## 18 Speckles 1.411484e-01 8.573432e-01 1.508417e-03
## 19 Speckles 1.070218e-01 8.924559e-01 5.222768e-04
## 20 Territory 1.827601e-01 1.133181e-03 8.161067e-01
## 21 Territory 4.481823e-01 1.941812e-02 5.323995e-01
## 22 Intermediate 7.218091e-01 7.276706e-02 2.054238e-01
## 23 Intermediate 7.385910e-01 7.677511e-03 2.537315e-01
## 24 Speckles 4.529223e-01 5.256692e-01 2.140854e-02
## 25 Intermediate 5.497300e-01 4.209291e-01 2.934098e-02
## 26 Speckles 3.875296e-01 4.528388e-01 1.596316e-01
## 27 Intermediate 5.710534e-01 3.175089e-01 1.114377e-01
## 28 Intermediate 5.328298e-01 9.322264e-02 3.739476e-01
## 29 Intermediate 5.873464e-01 3.819178e-01 3.073582e-02
## 30 Intermediate 5.193787e-01 4.502562e-01 3.036507e-02
## 31 Intermediate 6.771456e-01 4.519962e-02 2.776548e-01
## 32 Intermediate 6.924128e-01 1.247991e-01 1.827882e-01
## 33 Intermediate 6.702736e-01 3.078211e-01 2.190526e-02
## 34 Intermediate 6.341335e-01 2.416717e-02 3.416994e-01
## 35 Intermediate 5.512392e-01 2.804028e-03 4.459568e-01
## 36 Speckles 3.551550e-02 9.644157e-01 6.880387e-05
## 37 Territory 3.695187e-01 1.308774e-02 6.173936e-01
## 38 Intermediate 7.705033e-01 1.137950e-01 1.157017e-01
## 39 Intermediate 8.575302e-01 7.641109e-02 6.605872e-02
## 40 Speckles 4.321159e-01 5.530248e-01 1.485931e-02
## 41 Intermediate 5.418591e-01 5.538020e-02 4.027607e-01
## 42 Territory 4.732694e-01 4.467940e-02 4.820512e-01
## 43 Speckles 2.917847e-01 6.742492e-01 3.396616e-02
## 44 Intermediate 5.274035e-01 4.366180e-01 3.597851e-02
## 45 Intermediate 5.754522e-01 3.740979e-01 5.044990e-02
## 46 Intermediate 6.173031e-01 1.390299e-01 2.436670e-01
## 47 Intermediate 4.972821e-01 5.536371e-02 4.473542e-01
## 48 Intermediate 6.883942e-01 2.387093e-01 7.289658e-02
## 49 Intermediate 5.263523e-01 3.812101e-01 9.243761e-02
## 50 Intermediate 5.405966e-01 3.736254e-01 8.577805e-02
## 51 Intermediate 6.911764e-01 1.845428e-01 1.242808e-01
## 52 Intermediate 6.155366e-01 7.149645e-02 3.129669e-01
## 53 Intermediate 7.124565e-01 8.327972e-03 2.792155e-01
## 54 Intermediate 6.211602e-01 3.296203e-01 4.921958e-02
## 55 Intermediate 6.343192e-01 2.655479e-01 1.001329e-01
## 56 Territory 4.532037e-01 5.447584e-02 4.923205e-01
## 57 Territory 3.237415e-01 2.566082e-02 6.505976e-01
## 58 Intermediate 6.819858e-01 9.708059e-02 2.209336e-01
## 59 Intermediate 7.605391e-01 1.506759e-02 2.243933e-01
## 60 Intermediate 4.868609e-01 2.715210e-01 2.416181e-01
## 61 Speckles 3.637066e-01 6.154911e-01 2.080230e-02
## 62 Intermediate 7.421402e-01 7.420915e-02 1.836507e-01
## 63 Intermediate 6.723360e-01 2.426605e-01 8.500355e-02
## 64 Intermediate 6.453945e-01 4.621845e-02 3.083871e-01
## 65 Intermediate 7.395603e-01 5.901486e-02 2.014248e-01
## 66 Intermediate 7.972195e-01 7.291827e-02 1.298622e-01
## 67 Intermediate 7.864926e-01 1.600472e-01 5.346017e-02
## 68 Intermediate 5.434465e-01 4.315161e-01 2.503738e-02
## 69 Intermediate 7.176793e-01 1.047724e-01 1.775483e-01
## 70 Intermediate 5.224806e-01 4.468186e-01 3.070074e-02
## 71 Territory 4.218110e-01 2.564620e-02 5.525428e-01
## 72 Intermediate 6.003483e-01 2.970959e-01 1.025557e-01
## 73 Territory 6.818259e-02 1.123977e-03 9.306934e-01
## 74 Intermediate 6.545468e-01 3.102911e-01 3.516211e-02
## 75 Intermediate 5.742594e-01 3.767826e-01 4.895805e-02
## 76 Territory 3.699739e-01 1.015013e-02 6.198760e-01
## 77 Intermediate 6.772891e-01 1.130829e-01 2.096280e-01
## 78 Intermediate 5.826202e-01 3.584594e-01 5.892036e-02
## 79 Intermediate 6.578401e-01 2.105794e-02 3.211020e-01
## 80 Territory 4.265757e-01 4.056551e-02 5.328587e-01
## 81 Intermediate 6.943420e-01 5.969921e-02 2.459588e-01
## 82 Intermediate 7.276358e-01 1.030036e-01 1.693605e-01
## 83 Intermediate 7.168753e-01 2.081137e-01 7.501098e-02
## 84 Intermediate 6.198025e-01 2.762601e-02 3.525715e-01
## 85 Territory 3.156881e-01 1.095983e-02 6.733521e-01
## 86 Intermediate 5.482599e-01 3.786004e-03 4.479541e-01
## 87 Territory 1.766833e-01 4.413278e-03 8.189035e-01
## 88 Intermediate 6.713640e-01 2.196814e-01 1.089546e-01
## 89 Intermediate 6.298294e-01 3.431362e-01 2.703433e-02
## 90 Intermediate 5.587627e-01 3.982602e-01 4.297708e-02
## 91 Intermediate 6.117728e-01 3.040680e-01 8.415918e-02
## 92 Intermediate 5.499502e-01 4.229064e-01 2.714333e-02
## 93 Speckles 5.656171e-02 9.430619e-01 3.763890e-04
## 94 Intermediate 7.818066e-01 6.658385e-02 1.516095e-01
## 95 Intermediate 6.424960e-01 3.068771e-01 5.062685e-02
## 96 Speckles 3.344485e-01 6.591334e-01 6.418124e-03
## 97 Speckles 1.208186e-01 8.783069e-01 8.745414e-04
## 98 Intermediate 4.970360e-01 4.612656e-01 4.169842e-02
## 99 Intermediate 7.251684e-01 1.000598e-01 1.747718e-01
## 100 Intermediate 6.467405e-01 2.963889e-01 5.687058e-02
## 101 Intermediate 6.789210e-01 5.288529e-02 2.681937e-01
## 102 Intermediate 6.091792e-01 2.007885e-01 1.900323e-01
## 103 Intermediate 6.664968e-01 1.300542e-01 2.034490e-01
## 104 Intermediate 6.842653e-01 1.352953e-01 1.804394e-01
## 105 Intermediate 5.685806e-01 2.515834e-01 1.798360e-01
## 106 Intermediate 6.391027e-01 2.829634e-01 7.793397e-02
## 107 Speckles 3.579578e-01 6.345706e-01 7.471637e-03
## 108 Intermediate 6.031296e-01 2.841393e-01 1.127311e-01
## 109 Intermediate 6.069430e-01 2.591161e-01 1.339409e-01
## 110 Intermediate 6.447673e-01 3.140782e-01 4.115450e-02
## 111 Speckles 1.755315e-01 8.184620e-01 6.006512e-03
## 112 Intermediate 5.728918e-01 1.937297e-01 2.333785e-01
## 113 Intermediate 6.726895e-01 2.352585e-01 9.205201e-02
## 114 Intermediate 5.001732e-01 4.398125e-01 6.001429e-02
## 115 Intermediate 5.113200e-01 5.519639e-02 4.334836e-01
## 116 Speckles 4.194293e-01 5.716640e-01 8.906698e-03
## 117 Intermediate 5.916851e-01 1.859596e-02 3.897189e-01
## 118 Speckles 4.368797e-01 5.122741e-01 5.084625e-02
## 119 Intermediate 5.894448e-01 2.218266e-01 1.887287e-01
## 120 Territory 1.969399e-01 6.180842e-03 7.968792e-01
## 121 Intermediate 6.909431e-01 1.504258e-01 1.586311e-01
## 122 Speckles 4.201716e-01 5.680827e-01 1.174566e-02
## 123 Intermediate 5.084905e-01 1.113565e-02 4.803739e-01
## 124 Speckles 4.098068e-01 5.868241e-01 3.369116e-03
## 125 Speckles 2.859636e-01 6.763578e-01 3.767864e-02
## 126 Intermediate 7.161693e-01 2.101324e-01 7.369830e-02
## 127 Speckles 3.361727e-01 6.522572e-01 1.157012e-02
## 128 Intermediate 6.713185e-01 2.562570e-01 7.242456e-02
## 129 Intermediate 6.941632e-01 2.347279e-01 7.110887e-02
## 130 Intermediate 6.039668e-01 2.944807e-01 1.015525e-01
## 131 Intermediate 6.532936e-01 2.209789e-02 3.246085e-01
## 132 Intermediate 5.478175e-01 6.003623e-02 3.921462e-01
## 133 Intermediate 5.353589e-01 3.961639e-01 6.847712e-02
## 134 Intermediate 6.479606e-01 3.127039e-01 3.933546e-02
## 135 Speckles 1.234695e-01 8.752859e-01 1.244633e-03
## 136 Speckles 3.367825e-03 9.966217e-01 1.045719e-05
## 137 Intermediate 4.921447e-01 4.869942e-01 2.086111e-02
## 138 Speckles 3.035066e-02 9.695363e-01 1.130823e-04
## 139 Speckles 3.344330e-01 6.573623e-01 8.204678e-03
## 140 Speckles 4.308172e-01 4.784987e-01 9.068417e-02
## 141 Speckles 4.202226e-01 4.810503e-01 9.872710e-02
## 142 Speckles 1.837860e-01 7.909559e-01 2.525810e-02
## 143 Speckles 8.712075e-02 9.120692e-01 8.100512e-04
## 144 Speckles 1.458092e-01 8.492559e-01 4.934954e-03
## 145 Speckles 2.229897e-03 9.977681e-01 2.036905e-06
## 146 Speckles 1.830110e-01 8.154462e-01 1.542848e-03
## 147 Speckles 1.223805e-01 8.764076e-01 1.211882e-03
## 148 Speckles 4.105297e-03 9.958914e-01 3.323450e-06
## 149 Speckles 8.191035e-04 9.991806e-01 2.479646e-07
## 150 Speckles 8.271193e-04 9.991725e-01 3.749410e-07
## 151 Speckles 1.301868e-03 9.986967e-01 1.453438e-06
## 152 Speckles 2.207470e-02 9.779105e-01 1.478758e-05
## 153 Speckles 3.791907e-01 6.128513e-01 7.958085e-03
## 154 Intermediate 6.182298e-01 3.555585e-01 2.621168e-02
## 155 Speckles 1.582306e-03 9.984062e-01 1.151258e-05
## 156 Speckles 1.462885e-01 8.526695e-01 1.041988e-03
## 157 Speckles 3.543527e-02 9.643999e-01 1.648567e-04
## 158 Speckles 7.237089e-02 9.226109e-01 5.018251e-03
## 159 Speckles 2.560881e-01 7.344623e-01 9.449580e-03
## 160 Intermediate 4.998961e-01 4.743073e-01 2.579657e-02
## 161 Speckles 1.036325e-02 9.894490e-01 1.877321e-04
## 162 Speckles 4.698160e-01 5.186834e-01 1.150066e-02
## 163 Speckles 3.358819e-02 9.655096e-01 9.022399e-04
## 164 Intermediate 5.911156e-01 3.654192e-01 4.346514e-02
## 165 Intermediate 7.395561e-01 1.781658e-01 8.227807e-02
## 166 Speckles 2.870576e-01 6.883752e-01 2.456719e-02
## 167 Speckles 4.464013e-02 9.552624e-01 9.742733e-05
## 168 Speckles 4.273092e-01 4.953364e-01 7.735436e-02
## 169 Speckles 2.884566e-01 6.714593e-01 4.008411e-02
## 170 Speckles 2.766211e-01 7.180332e-01 5.345713e-03
## 171 Speckles 3.582496e-02 9.639566e-01 2.184100e-04
## 172 Intermediate 5.504647e-01 2.926605e-01 1.568748e-01
## 173 Speckles 3.258318e-01 6.506697e-01 2.349850e-02
## 174 Speckles 1.157486e-01 8.805367e-01 3.714772e-03
## 175 Speckles 6.559558e-02 9.337178e-01 6.866571e-04
## 176 Speckles 7.001061e-02 9.293980e-01 5.913612e-04
## 177 Speckles 3.131387e-01 6.643021e-01 2.255919e-02
## 178 Speckles 9.883036e-02 9.005497e-01 6.199042e-04
## 179 Speckles 2.517304e-02 9.746898e-01 1.371250e-04
## 180 Intermediate 5.346863e-01 4.420694e-01 2.324429e-02
## 181 Territory 8.912594e-02 9.797052e-04 9.098944e-01
## 182 Speckles 2.013921e-03 9.979843e-01 1.786845e-06
## 183 Intermediate 5.369526e-01 3.987907e-01 6.425671e-02
## 184 Speckles 3.169487e-03 9.968300e-01 5.626013e-07
## 185 Speckles 5.119664e-02 9.484908e-01 3.125301e-04
## 186 Speckles 1.564393e-01 8.410045e-01 2.556233e-03
## 187 Speckles 3.828014e-01 6.104237e-01 6.774965e-03
## 188 Speckles 1.055445e-01 8.933488e-01 1.106714e-03
## 189 Intermediate 5.288059e-01 4.031657e-01 6.802836e-02
## 190 Intermediate 5.288059e-01 4.031657e-01 6.802836e-02
## 191 Territory 3.390955e-01 3.241300e-02 6.284915e-01
## 192 Speckles 9.697582e-02 9.026780e-01 3.462144e-04
## 193 Speckles 2.989867e-01 6.949248e-01 6.088547e-03
## 194 Speckles 1.299098e-01 8.661599e-01 3.930282e-03
## 195 Speckles 1.309196e-01 8.683564e-01 7.240282e-04
## 196 Speckles 3.032248e-02 9.692477e-01 4.298336e-04
## 197 Speckles 5.586467e-02 9.439248e-01 2.104821e-04
## 198 Speckles 2.534110e-01 7.456360e-01 9.529672e-04
## 199 Speckles 1.401415e-01 8.590129e-01 8.456267e-04
## 200 Intermediate 5.184270e-01 3.687320e-01 1.128411e-01
## 201 Speckles 1.285570e-01 8.690806e-01 2.362372e-03
## 202 Speckles 8.527764e-02 9.144738e-01 2.486033e-04
## 203 Intermediate 6.097949e-01 3.582371e-01 3.196800e-02
## 204 Intermediate 5.413062e-01 4.432842e-01 1.540965e-02
## 205 Speckles 2.949064e-01 7.015406e-01 3.552988e-03
## 206 Intermediate 6.258915e-01 1.860147e-01 1.880938e-01
## 207 Speckles 3.551111e-01 6.398983e-01 4.990534e-03
## 208 Speckles 1.753861e-01 8.231163e-01 1.497551e-03
## 209 Speckles 2.753381e-01 7.217321e-01 2.929832e-03
## 210 Speckles 2.822490e-01 7.121233e-01 5.627694e-03
## 211 Speckles 1.709056e-01 8.246265e-01 4.467837e-03
## 212 Speckles 2.504502e-01 7.441132e-01 5.436593e-03
## 213 Speckles 3.851946e-02 9.607451e-01 7.354196e-04
## 214 Speckles 1.103546e-01 8.878312e-01 1.814113e-03
## 215 Territory 1.538098e-01 2.305552e-03 8.438847e-01
## 216 Speckles 1.881062e-02 9.811731e-01 1.631209e-05
## 217 Speckles 4.362702e-01 5.111574e-01 5.257242e-02
## 218 Speckles 3.956759e-02 9.581775e-01 2.254946e-03
## 219 Speckles 2.160279e-01 7.759509e-01 8.021188e-03
## 220 Speckles 4.358824e-01 5.122267e-01 5.189093e-02
## 221 Speckles 9.152495e-02 9.082953e-01 1.797209e-04
## 222 Intermediate 5.440199e-01 4.438644e-01 1.211570e-02
## 223 Speckles 1.456237e-01 8.525751e-01 1.801224e-03
## 224 Speckles 9.447344e-02 9.050735e-01 4.530125e-04
## 225 Speckles 1.154205e-01 8.840947e-01 4.848621e-04
## 226 Speckles 4.048941e-01 5.773620e-01 1.774384e-02
## 227 Speckles 1.757120e-01 8.231793e-01 1.108611e-03
## 228 Speckles 2.858429e-01 7.117909e-01 2.366220e-03
## 229 Speckles 2.990758e-01 6.939430e-01 6.981184e-03
## 230 Speckles 3.102953e-01 6.741639e-01 1.554079e-02
## 231 Intermediate 5.875605e-01 3.662715e-01 4.616798e-02
## 232 Speckles 3.056549e-01 6.902111e-01 4.133926e-03
## 233 Speckles 3.921911e-03 9.960737e-01 4.426321e-06
## 234 Speckles 4.847154e-01 4.907257e-01 2.455897e-02
## 235 Speckles 4.813827e-01 4.984196e-01 2.019770e-02
## 236 Speckles 2.309706e-02 9.767628e-01 1.401787e-04
## 237 Speckles 4.589277e-01 5.114724e-01 2.959992e-02
## 238 Speckles 9.732837e-02 9.013359e-01 1.335748e-03
## 239 Speckles 9.899831e-02 8.982084e-01 2.793267e-03
## 240 Speckles 3.420258e-01 6.521932e-01 5.780961e-03
## 241 Speckles 1.391045e-03 9.986089e-01 8.682728e-08
## 242 Speckles 2.239920e-02 9.774641e-01 1.366696e-04
## 243 Intermediate 6.010021e-01 3.482611e-01 5.073677e-02
## 244 Speckles 2.482515e-03 9.975168e-01 7.123662e-07
## 245 Speckles 2.482968e-01 7.487955e-01 2.907647e-03
## 246 Speckles 3.083318e-01 6.733232e-01 1.834506e-02
## 247 Speckles 5.540015e-02 9.443630e-01 2.368845e-04
## 248 Speckles 3.904247e-04 9.996093e-01 2.806351e-07
## 249 Speckles 1.897711e-01 8.000899e-01 1.013897e-02
## 250 Speckles 1.323653e-01 8.628495e-01 4.785171e-03
## 251 Speckles 3.921769e-02 9.607370e-01 4.527356e-05
## 252 Speckles 2.625773e-01 7.337872e-01 3.635555e-03
## 253 Intermediate 5.851956e-01 3.853273e-01 2.947709e-02
## 254 Speckles 6.249596e-02 9.369970e-01 5.070391e-04
## 255 Speckles 1.469244e-01 8.524502e-01 6.253848e-04
## 256 Speckles 1.049407e-01 8.942172e-01 8.420096e-04
## 257 Speckles 3.849410e-02 9.612512e-01 2.546816e-04
## 258 Speckles 4.145067e-01 5.736963e-01 1.179706e-02
## 259 Speckles 8.748110e-02 9.119045e-01 6.144061e-04
## 260 Speckles 1.524191e-02 9.847411e-01 1.695814e-05
## 261 Speckles 3.719493e-01 6.105849e-01 1.746586e-02
## 262 Speckles 4.218295e-01 5.696674e-01 8.503113e-03
## 263 Speckles 2.037056e-01 7.802093e-01 1.608513e-02
## 264 Speckles 1.574402e-01 8.415312e-01 1.028610e-03
## 265 Speckles 2.028313e-02 9.789556e-01 7.612973e-04
## 266 Speckles 1.011956e-02 9.898499e-01 3.049414e-05
## 267 Speckles 1.845596e-01 8.119241e-01 3.516350e-03
## 268 Intermediate 6.901353e-01 2.181660e-01 9.169870e-02
## 269 Intermediate 5.443023e-01 4.423491e-01 1.334859e-02
## 270 Territory 1.501742e-03 2.449704e-07 9.984980e-01
## 271 Territory 1.448419e-02 1.032138e-04 9.854126e-01
## 272 Territory 1.238953e-01 6.367432e-04 8.754680e-01
## 273 Intermediate 4.729328e-01 5.976213e-02 4.673051e-01
## 274 Territory 9.751662e-03 1.417321e-05 9.902342e-01
## 275 Territory 3.371206e-01 2.234823e-02 6.405311e-01
## 276 Territory 8.721715e-03 5.683783e-06 9.912726e-01
## 277 Territory 1.153739e-02 1.691970e-05 9.884457e-01
## 278 Territory 1.273698e-01 3.813658e-04 8.722489e-01
## 279 Territory 4.011780e-02 3.234185e-05 9.598499e-01
## 280 Territory 1.017412e-01 5.914277e-04 8.976674e-01
## 281 Territory 4.722886e-01 9.122048e-03 5.185894e-01
## 282 Territory 9.898419e-03 3.020925e-05 9.900714e-01
## 283 Territory 3.222130e-01 2.817241e-02 6.496146e-01
## 284 Territory 1.341703e-02 7.954137e-06 9.865750e-01
## 285 Territory 2.995370e-01 4.670657e-03 6.957924e-01
## 286 Territory 9.098808e-03 4.499900e-06 9.908967e-01
## 287 Intermediate 4.793056e-01 1.095710e-01 4.111233e-01
## 288 Territory 1.814316e-03 4.878285e-07 9.981852e-01
## 289 Territory 2.501441e-02 2.595686e-04 9.747260e-01
## 290 Territory 4.110505e-02 5.565966e-04 9.583383e-01
## 291 Territory 2.204362e-01 1.032174e-04 7.794606e-01
## 292 Territory 3.667855e-01 1.924857e-02 6.139659e-01
## 293 Territory 1.016033e-01 9.594386e-04 8.974372e-01
## 294 Territory 8.393571e-02 1.864944e-03 9.141993e-01
## 295 Territory 3.265179e-01 3.804477e-03 6.696777e-01
## 296 Intermediate 6.786614e-01 1.609426e-01 1.603960e-01
## 297 Territory 3.273890e-01 1.269974e-02 6.599113e-01
## 298 Territory 4.100258e-01 1.701035e-02 5.729638e-01
## 299 Territory 3.250007e-01 5.800822e-03 6.691985e-01
## 300 Territory 4.877566e-02 3.185726e-04 9.509058e-01
## 301 Territory 2.868178e-01 1.303772e-02 7.001445e-01
## 302 Intermediate 6.691762e-01 1.930609e-01 1.377630e-01
## 303 Intermediate 4.884199e-01 4.673784e-02 4.648422e-01
## 304 Territory 1.292643e-01 2.083424e-03 8.686522e-01
## 305 Territory 3.387482e-01 1.909165e-02 6.421601e-01
## 306 Territory 2.990598e-01 2.822278e-02 6.727174e-01
## 307 Territory 1.406821e-01 2.153664e-03 8.571642e-01
## 308 Territory 1.949171e-01 5.124945e-03 7.999580e-01
## 309 Territory 5.847822e-02 7.320651e-04 9.407897e-01
## 310 Territory 5.446412e-02 1.988232e-04 9.453371e-01
## 311 Speckles 3.490926e-01 4.697305e-01 1.811769e-01
## 312 Territory 8.120024e-03 5.241428e-05 9.918276e-01
## 313 Territory 3.473655e-01 6.021764e-02 5.924169e-01
## 314 Intermediate 6.057083e-01 3.975479e-02 3.545369e-01
## 315 Territory 1.754903e-02 9.921960e-06 9.824410e-01
## 316 Intermediate 7.023601e-01 1.273595e-01 1.702804e-01
## 317 Territory 1.120328e-03 1.694204e-07 9.988795e-01
## 318 Territory 2.506630e-01 2.472987e-02 7.246071e-01
## 319 Territory 2.506630e-01 2.472987e-02 7.246071e-01
## 320 Territory 1.743859e-02 6.464477e-06 9.825549e-01
## 321 Intermediate 6.626297e-01 3.043195e-01 3.305086e-02
## 322 Territory 2.462949e-01 6.943344e-03 7.467618e-01
## 323 Territory 1.033479e-01 1.810874e-04 8.964710e-01
## 324 Territory 7.432827e-03 2.443880e-06 9.925647e-01
## 325 Territory 5.468742e-03 6.927964e-06 9.945243e-01
## 326 Territory 1.870479e-03 1.612096e-06 9.981279e-01
## 327 Territory 2.149680e-01 1.045647e-02 7.745755e-01
## 328 Territory 2.498004e-01 2.820259e-02 7.219970e-01
## 329 Territory 3.534906e-01 8.651075e-03 6.378583e-01
## 330 Territory 4.730851e-01 1.218593e-02 5.147290e-01
## 331 Territory 1.073492e-01 1.557798e-03 8.910930e-01
## 332 Territory 3.633873e-01 7.711373e-02 5.594990e-01
## 333 Speckles 3.505416e-01 5.897284e-01 5.973004e-02
## 334 Territory 3.969787e-02 1.810700e-05 9.602840e-01
## 335 Territory 5.471642e-02 5.625372e-05 9.452273e-01
## 336 Speckles 4.556942e-01 4.823569e-01 6.194891e-02
## 337 Territory 2.780721e-01 9.678722e-04 7.209600e-01
## 338 Territory 8.083709e-05 6.970526e-08 9.999191e-01
## 339 Territory 4.388036e-01 2.738948e-03 5.584574e-01
## 340 Territory 1.087290e-01 8.777645e-04 8.903933e-01
## 341 Territory 2.026008e-01 3.373575e-03 7.940257e-01
## 342 Territory 3.482829e-01 1.416439e-02 6.375527e-01
## 343 Territory 2.042594e-02 5.456068e-05 9.795195e-01
## 344 Intermediate 6.741879e-01 3.812493e-02 2.876872e-01
## 345 Territory 1.455538e-02 9.943945e-05 9.853452e-01
## 346 Territory 3.218840e-02 1.242100e-04 9.676874e-01
## 347 Intermediate 7.822000e-01 8.234183e-02 1.354582e-01
## 348 Territory 2.596336e-02 1.377486e-05 9.740229e-01
## 349 Territory 4.600680e-01 5.098815e-02 4.889439e-01
## 350 Territory 3.732789e-02 3.157657e-04 9.623563e-01
## 351 Territory 3.072642e-02 1.581747e-04 9.691154e-01
## 352 Territory 2.697628e-03 3.663014e-07 9.973020e-01
## 353 Intermediate 6.101766e-01 5.527792e-02 3.345455e-01
## 354 Territory 1.376335e-01 1.904116e-03 8.604624e-01
## 355 Territory 2.999766e-01 7.563428e-02 6.243892e-01
## 356 Territory 5.960638e-02 2.643699e-04 9.401293e-01
## 357 Territory 4.369902e-02 5.189360e-05 9.562491e-01
## 358 Territory 6.147696e-02 1.287314e-04 9.383943e-01
## 359 Intermediate 6.238080e-01 6.933543e-02 3.068566e-01
## 360 Territory 4.492871e-02 4.469152e-04 9.546244e-01
## 361 Territory 1.397782e-02 1.455397e-05 9.860076e-01
## 362 Territory 3.595670e-01 1.672326e-02 6.237098e-01
## 363 Territory 2.539488e-02 3.921985e-05 9.745659e-01
## 364 Territory 2.064653e-02 1.308881e-04 9.792226e-01
## 365 Territory 3.942628e-02 2.499444e-04 9.603238e-01
## 366 Territory 1.996242e-01 3.102390e-03 7.972734e-01
## 367 Territory 1.636563e-02 1.889055e-04 9.834455e-01
## 368 Territory 7.513809e-02 1.023408e-03 9.238385e-01
## 369 Territory 5.180903e-04 1.186597e-08 9.994819e-01
## 370 Territory 2.486540e-01 8.063161e-03 7.432829e-01
## 371 Territory 9.907531e-02 9.613213e-04 8.999634e-01
## 372 Territory 4.744761e-01 4.538451e-02 4.801394e-01
## 373 Intermediate 7.337704e-01 1.404247e-01 1.258048e-01
## 374 Territory 2.188296e-04 1.864402e-08 9.997812e-01
## 375 Territory 3.536404e-01 5.892920e-03 6.404667e-01
## 376 Territory 1.610468e-01 2.251277e-03 8.367019e-01
## 377 Territory 3.216944e-02 5.447604e-05 9.677761e-01
## 378 Territory 4.476767e-02 3.581327e-04 9.548742e-01
## 379 Territory 1.573349e-01 2.387963e-03 8.402772e-01
## 380 Territory 3.712625e-01 1.275208e-02 6.159854e-01
## 381 Territory 4.518343e-02 3.884810e-04 9.544281e-01
## 382 Territory 1.527280e-02 1.369025e-05 9.847135e-01
## 383 Territory 2.257970e-01 1.575388e-02 7.584491e-01
## 384 Intermediate 6.423334e-01 4.270380e-02 3.149628e-01
## 385 Territory 4.042857e-01 3.029923e-02 5.654151e-01
## 386 Territory 1.527088e-01 1.301477e-03 8.459897e-01
## 387 Territory 3.101813e-02 2.922433e-04 9.686896e-01
## 388 Intermediate 6.578401e-01 2.105794e-02 3.211020e-01
## 389 Territory 1.776687e-01 2.544176e-04 8.220769e-01
## 390 Territory 1.569225e-01 6.918774e-04 8.423856e-01
## 391 Territory 3.805699e-02 4.007133e-05 9.619029e-01
## 392 Territory 2.368111e-02 4.522246e-05 9.762737e-01
## 393 Territory 5.340802e-02 1.046186e-04 9.464874e-01
## 394 Intermediate 5.866720e-01 1.559279e-01 2.574001e-01
## 395 Territory 1.154760e-01 1.190342e-03 8.833337e-01
## 396 Territory 3.652917e-01 9.427411e-02 5.404342e-01
## 397 Intermediate 6.446405e-01 1.052504e-01 2.501091e-01
## 398 Territory 3.726922e-02 2.890844e-05 9.627019e-01
## 399 Territory 5.043191e-02 1.816224e-04 9.493865e-01
## 400 Territory 2.061842e-03 1.921542e-07 9.979380e-01
## 401 Territory 2.663339e-02 4.469702e-05 9.733219e-01
## 402 Territory 1.505738e-01 2.109041e-03 8.473172e-01
## 403 Territory 2.157204e-03 6.985854e-07 9.978421e-01
## 404 Territory 1.293634e-01 3.313342e-03 8.673233e-01
## res_fin.Phenotype
## 1 Intermediate
## 2 Intermediate
## 3 Intermediate
## 4 Intermediate
## 5 Intermediate
## 6 Intermediate
## 7 Intermediate
## 8 Intermediate
## 9 Intermediate
## 10 Intermediate
## 11 Intermediate
## 12 Intermediate
## 13 Intermediate
## 14 Intermediate
## 15 Intermediate
## 16 Intermediate
## 17 Intermediate
## 18 Intermediate
## 19 Intermediate
## 20 Intermediate
## 21 Intermediate
## 22 Intermediate
## 23 Intermediate
## 24 Intermediate
## 25 Intermediate
## 26 Intermediate
## 27 Intermediate
## 28 Intermediate
## 29 Intermediate
## 30 Intermediate
## 31 Intermediate
## 32 Intermediate
## 33 Intermediate
## 34 Intermediate
## 35 Intermediate
## 36 Intermediate
## 37 Intermediate
## 38 Intermediate
## 39 Intermediate
## 40 Intermediate
## 41 Intermediate
## 42 Intermediate
## 43 Intermediate
## 44 Intermediate
## 45 Intermediate
## 46 Intermediate
## 47 Intermediate
## 48 Intermediate
## 49 Intermediate
## 50 Intermediate
## 51 Intermediate
## 52 Intermediate
## 53 Intermediate
## 54 Intermediate
## 55 Intermediate
## 56 Intermediate
## 57 Intermediate
## 58 Intermediate
## 59 Intermediate
## 60 Intermediate
## 61 Intermediate
## 62 Intermediate
## 63 Intermediate
## 64 Intermediate
## 65 Intermediate
## 66 Intermediate
## 67 Intermediate
## 68 Intermediate
## 69 Intermediate
## 70 Intermediate
## 71 Intermediate
## 72 Intermediate
## 73 Intermediate
## 74 Intermediate
## 75 Intermediate
## 76 Intermediate
## 77 Intermediate
## 78 Intermediate
## 79 Intermediate
## 80 Intermediate
## 81 Intermediate
## 82 Intermediate
## 83 Intermediate
## 84 Intermediate
## 85 Intermediate
## 86 Intermediate
## 87 Intermediate
## 88 Intermediate
## 89 Intermediate
## 90 Intermediate
## 91 Intermediate
## 92 Intermediate
## 93 Intermediate
## 94 Intermediate
## 95 Intermediate
## 96 Intermediate
## 97 Intermediate
## 98 Intermediate
## 99 Intermediate
## 100 Intermediate
## 101 Intermediate
## 102 Intermediate
## 103 Intermediate
## 104 Intermediate
## 105 Intermediate
## 106 Intermediate
## 107 Intermediate
## 108 Intermediate
## 109 Intermediate
## 110 Intermediate
## 111 Intermediate
## 112 Intermediate
## 113 Intermediate
## 114 Intermediate
## 115 Intermediate
## 116 Intermediate
## 117 Intermediate
## 118 Intermediate
## 119 Intermediate
## 120 Intermediate
## 121 Intermediate
## 122 Intermediate
## 123 Intermediate
## 124 Intermediate
## 125 Intermediate
## 126 Intermediate
## 127 Intermediate
## 128 Intermediate
## 129 Intermediate
## 130 Intermediate
## 131 Intermediate
## 132 Intermediate
## 133 Intermediate
## 134 Intermediate
## 135 Speckles
## 136 Speckles
## 137 Speckles
## 138 Speckles
## 139 Speckles
## 140 Speckles
## 141 Speckles
## 142 Speckles
## 143 Speckles
## 144 Speckles
## 145 Speckles
## 146 Speckles
## 147 Speckles
## 148 Speckles
## 149 Speckles
## 150 Speckles
## 151 Speckles
## 152 Speckles
## 153 Speckles
## 154 Speckles
## 155 Speckles
## 156 Speckles
## 157 Speckles
## 158 Speckles
## 159 Speckles
## 160 Speckles
## 161 Speckles
## 162 Speckles
## 163 Speckles
## 164 Speckles
## 165 Speckles
## 166 Speckles
## 167 Speckles
## 168 Speckles
## 169 Speckles
## 170 Speckles
## 171 Speckles
## 172 Speckles
## 173 Speckles
## 174 Speckles
## 175 Speckles
## 176 Speckles
## 177 Speckles
## 178 Speckles
## 179 Speckles
## 180 Speckles
## 181 Speckles
## 182 Speckles
## 183 Speckles
## 184 Speckles
## 185 Speckles
## 186 Speckles
## 187 Speckles
## 188 Speckles
## 189 Speckles
## 190 Speckles
## 191 Speckles
## 192 Speckles
## 193 Speckles
## 194 Speckles
## 195 Speckles
## 196 Speckles
## 197 Speckles
## 198 Speckles
## 199 Speckles
## 200 Speckles
## 201 Speckles
## 202 Speckles
## 203 Speckles
## 204 Speckles
## 205 Speckles
## 206 Speckles
## 207 Speckles
## 208 Speckles
## 209 Speckles
## 210 Speckles
## 211 Speckles
## 212 Speckles
## 213 Speckles
## 214 Speckles
## 215 Speckles
## 216 Speckles
## 217 Speckles
## 218 Speckles
## 219 Speckles
## 220 Speckles
## 221 Speckles
## 222 Speckles
## 223 Speckles
## 224 Speckles
## 225 Speckles
## 226 Speckles
## 227 Speckles
## 228 Speckles
## 229 Speckles
## 230 Speckles
## 231 Speckles
## 232 Speckles
## 233 Speckles
## 234 Speckles
## 235 Speckles
## 236 Speckles
## 237 Speckles
## 238 Speckles
## 239 Speckles
## 240 Speckles
## 241 Speckles
## 242 Speckles
## 243 Speckles
## 244 Speckles
## 245 Speckles
## 246 Speckles
## 247 Speckles
## 248 Speckles
## 249 Speckles
## 250 Speckles
## 251 Speckles
## 252 Speckles
## 253 Speckles
## 254 Speckles
## 255 Speckles
## 256 Speckles
## 257 Speckles
## 258 Speckles
## 259 Speckles
## 260 Speckles
## 261 Speckles
## 262 Speckles
## 263 Speckles
## 264 Speckles
## 265 Speckles
## 266 Speckles
## 267 Speckles
## 268 Speckles
## 269 Speckles
## 270 Territory
## 271 Territory
## 272 Territory
## 273 Territory
## 274 Territory
## 275 Territory
## 276 Territory
## 277 Territory
## 278 Territory
## 279 Territory
## 280 Territory
## 281 Territory
## 282 Territory
## 283 Territory
## 284 Territory
## 285 Territory
## 286 Territory
## 287 Territory
## 288 Territory
## 289 Territory
## 290 Territory
## 291 Territory
## 292 Territory
## 293 Territory
## 294 Territory
## 295 Territory
## 296 Territory
## 297 Territory
## 298 Territory
## 299 Territory
## 300 Territory
## 301 Territory
## 302 Territory
## 303 Territory
## 304 Territory
## 305 Territory
## 306 Territory
## 307 Territory
## 308 Territory
## 309 Territory
## 310 Territory
## 311 Territory
## 312 Territory
## 313 Territory
## 314 Territory
## 315 Territory
## 316 Territory
## 317 Territory
## 318 Territory
## 319 Territory
## 320 Territory
## 321 Territory
## 322 Territory
## 323 Territory
## 324 Territory
## 325 Territory
## 326 Territory
## 327 Territory
## 328 Territory
## 329 Territory
## 330 Territory
## 331 Territory
## 332 Territory
## 333 Territory
## 334 Territory
## 335 Territory
## 336 Territory
## 337 Territory
## 338 Territory
## 339 Territory
## 340 Territory
## 341 Territory
## 342 Territory
## 343 Territory
## 344 Territory
## 345 Territory
## 346 Territory
## 347 Territory
## 348 Territory
## 349 Territory
## 350 Territory
## 351 Territory
## 352 Territory
## 353 Territory
## 354 Territory
## 355 Territory
## 356 Territory
## 357 Territory
## 358 Territory
## 359 Territory
## 360 Territory
## 361 Territory
## 362 Territory
## 363 Territory
## 364 Territory
## 365 Territory
## 366 Territory
## 367 Territory
## 368 Territory
## 369 Territory
## 370 Territory
## 371 Territory
## 372 Territory
## 373 Territory
## 374 Territory
## 375 Territory
## 376 Territory
## 377 Territory
## 378 Territory
## 379 Territory
## 380 Territory
## 381 Territory
## 382 Territory
## 383 Territory
## 384 Territory
## 385 Territory
## 386 Territory
## 387 Territory
## 388 Territory
## 389 Territory
## 390 Territory
## 391 Territory
## 392 Territory
## 393 Territory
## 394 Territory
## 395 Territory
## 396 Territory
## 397 Territory
## 398 Territory
## 399 Territory
## 400 Territory
## 401 Territory
## 402 Territory
## 403 Territory
## 404 Territory
saveRDS(df_res,file="Output_ElasticNet.rds")
Final accuracy is 81.5%. Just by quickly glancing at the output dataframe (df_res ; stored locally), intermediates appear to be the most frequently misclassified class. Further, in multiple instances territories or speckles get classifed as intermediates incorrectly. Note: Some of these misclassified cases genuinely appear to have the wrong class label - Revise class labels?
Lets try more sophisticated ML approaches.
Test SVM with Radial Basis Function as kernel without any feature selection.
ToDo: Feature selection methods tests
filt_recipe_svm <- recipe(Phenotype~.,data=res_fin) %>%
step_normalize(all_numeric_predictors()) %>%
step_nzv(all_predictors())
### Lets not eliminate correlated features unlike logistic regression this time. Lets try supervised feature selection methods --
## Unfortunately, tidymodels doesn't have supervised feature selection methods incorporated yet. You need to use caret for approaches like Recursive Feature Elimination https://topepo.github.io/caret/recursive-feature-elimination.html
### Method 1 - Extract features ranked by Variable Importance from our gradient Boosted model using vip package -- NOTE THAT THIS IS JUST AN IDEA AS VIP PACKAGE NEEDS R v4.1 while mine is R v4
#res_model_xgboost_workflow_final %>% fit(res_testing) %>% vip::vi() %>% filter(Importance>0)
### Extracting feature importance from xgboosted trees is also not striaghtforward - lets just use a random forest to check
### Lets try recipeselectors package instead -- Recursive Feature eleimination .ie. backward selection
### define basic random forest model with default parameters for feature selection to use for SVM -- BAD PRACTICE?
res_model_svm <- svm_rbf(cost = tune(),rbf_sigma = tune(),margin = tune()) %>% set_mode("classification") %>% set_engine("kernlab")
### no feature selection ! - Lets leave them in for now - examine variable importance later -- NEED TO UPDATE R AND GET VIP PACKGE !
filt_recipe_svm <- recipe(Phenotype~.,data=res_fin) %>%
step_scale(all_numeric_predictors()) %>%
step_nzv(all_predictors())
res_model_svm_workflow <- workflow() %>% add_recipe(filt_recipe_svm) %>% add_model(res_model_svm)
penalty_grid_svm <- grid_random(cost(),
rbf_sigma(),
svm_margin(),
size=50)
svm_grid<- tune_grid(res_model_svm_workflow,resamples = res_folds,grid=penalty_grid_svm)
autoplot(svm_grid,metric="accuracy") ## ~ 80% accuracy for top models
Best performing models have up to 80% accuracy. This is also lower than previous tests.
best_props_svm <- svm_grid %>% select_best(metric="accuracy")
svm_model_metrics <- finalize_workflow(res_model_svm_workflow,best_props_svm) %>% fit_resamples(resamples=res_folds) %>% collect_metrics()
## Finalize workflow and return final metric on Test -- Close to train values !
res_model_svm_final <- finalize_workflow(res_model_svm_workflow,best_props_svm) %>% last_fit(res_split) ### final fit to train-test split- 89% accuracy
res_model_svm_final %>% collect_metrics()
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy multiclass 0.753 Preprocessor1_Model1
## 2 roc_auc hand_till 0.912 Preprocessor1_Model1
## 3 brier_class multiclass 0.161 Preprocessor1_Model1
Final accuracy is about 75%. Lets try Gradient Boosted Decision Trees
Note that Xgboost has a lot of tunable parameters, so we will adopt a two stage tuning where we tune the most important parameters first (tree depth, learning rate and number of trees) and then tune the rest later.
### No need to remove correlated variables here ! Tree based models are less sensitive to this
filt_recipe_xgboost <- recipe(Phenotype~.,data=res_fin) %>%
step_scale(all_numeric_predictors()) %>%
step_nzv(all_predictors())
## train recipe - Also optional -Note that when we add recipe object to workflow, you dont need preprocessing steps by prep and bake
filt_prep_xgboost <- filt_recipe_xgboost %>% prep(training=res_training)
#### Define model
### xgboost has a lot of hyperparameters ! -- Can't tune all of them at once (as it will search through higher dim. space) - Lets tune 3 parameters first
### and then tune rest in second stage for best Stage 1
res_model_xgboost_stage1 <- boost_tree(learn_rate = tune(),trees=tune(),tree_depth = tune()) %>% set_mode("classification") %>% set_engine("xgboost")
res_model_xgboost_workflow_stage1 <- workflow() %>% add_recipe(filt_recipe_xgboost) %>% add_model(res_model_xgboost_stage1)
## go for random grid values
penalty_grid_stage1 <- grid_random(learn_rate(),
trees(),
tree_depth(),
size=50) ## 50 random options for optimal learning rate search
xgboost_grid_stage1 <- tune_grid(res_model_xgboost_workflow_stage1,resamples = res_folds,grid=penalty_grid_stage1)
saveRDS(xgboost_grid_stage1,file="xgboost_grid_stage1.rds")
autoplot(xgboost_grid_stage1,metric="accuracy") ## can see that best model tops 90% accuracy
best_props_stage1 <- xgboost_grid_stage1 %>% select_best(metric="accuracy")
###### Stage 2 of model, optimze rest of parameters with Learning rate held constant - These are slightly less important
res_model_xgboost_stage2 <- res_model_xgboost_stage1 %>% set_args(learn_rate=best_props_stage1$learn_rate,
tree_depth=best_props_stage1$tree_depth,
trees=best_props_stage1$trees,
loss_reduction=tune(),
stop_iter=tune(),
min_n=tune())
res_model_xgboost_workflow_stage2 <- res_model_xgboost_workflow_stage1 %>% update_model(res_model_xgboost_stage2)
## 2nd stage tune_grid
penalty_grid_stage2 <- grid_random(loss_reduction(),
stop_iter(),
min_n(),
size=30)
xgboost_grid_stage2 <- tune_grid(res_model_xgboost_workflow_stage2,resamples = res_folds,grid=penalty_grid_stage2)
autoplot(xgboost_grid_stage2,metric="accuracy") ## ~ 91% accuracy for top models
best_props_stage2 <- xgboost_grid_stage2 %>% select_best(metric="accuracy")
###### Stage 2 of model, optimze rest of parameters with Learning rate held constant - These are slightly less important
res_model_xgboost_final <- res_model_xgboost_stage2 %>% set_args(learn_rate=best_props_stage1$learn_rate,
tree_depth=best_props_stage1$tree_depth,
trees=best_props_stage1$trees,
loss_reduction=best_props_stage2$loss_reduction,
stop_iter=best_props_stage2$stop_iter,
min_n=best_props_stage2$min_n)
res_model_xgboost_workflow_final <- res_model_xgboost_workflow_stage2 %>% update_model(res_model_xgboost_final)
#### Last fit !
res_model_xgboost_lastfit <- res_model_xgboost_workflow_final %>% last_fit(res_split)
res_model_xgboost_lastfit %>% collect_metrics()
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy multiclass 0.728 Preprocessor1_Model1
## 2 roc_auc hand_till 0.875 Preprocessor1_Model1
## 3 brier_class multiclass 0.213 Preprocessor1_Model1
Using XGBoost doesn’t signficantly improve prediction accuracy with this dataset. This is unlike previous attempt where we achieved upto 95% accuracy.
In general, XGboost > SVM > logistic regression with regularization ! -
It’s also possible to use Convolutional Neural Networks as shown below, however we have insufficient images for training properly (final accuracies are in the order of 60%). Explore this in the future !
ToDo: TensorFlow on R is not as up to date as Python. Had to revert to deprecated functions (image_data_generator()). Explore training on non-augmented data as well as other CNN architectures in Python.
Sys.setenv(RETICULATE_PYTHON = "/Users/ra36doj/.virtualenvs/r-tensorflow/bin/python")
#remotes::install_github("rstudio/tensorflow")
#reticulate::install_python()
#install_tensorflow()
library(tensorflow)
#install.packages("keras")
library(keras)
#install.packages("remotes")
#remotes::install_github(sprintf("rstudio/%s", c("reticulate", "tensorflow", "keras")))
#reticulate::miniconda_uninstall() # start with a blank slate
#reticulate::install_miniconda()
#keras::install_keras()
tensorflow::as_tensor("Hello World")
list_files_TF <- list.files(path = "lowResCrop_TF/",pattern=".png$",full.names = T)
### inspire by https://shirinsplayground.netlify.app/2018/06/keras_fruits/
phenotype_list <- c("Blobs","LowSignal","Territory")
output_n <- length(phenotype_list)
source_dir <- "lowResCrop_TF"
### create directories containing train and validation data with structure https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d
#Main directory
dir.create("NeuralNetwork")
#sub directories for validation and training
dir.create("NeuralNetwork/Train")
dir.create("NeuralNetwork/Test")
## subdirectories for 3 classes within
for (dir in c("Train","Test")){
for(phen in phenotype_list){
dir.create(paste0("NeuralNetwork/",dir,"/",phen))
}
}
### Note that here we arent adjusting for class imbalance -- is not substantial -- territories (highest class) have 25% more images than blobs (lowest)
set.seed(12552)
train_indices <- sample.int(length(list_files_TF),0.75*length(list_files_TF))
### Copy files into suitable directories
i <- 1
for (i in 1:length(list_files_TF)){
filename <- list_files_TF[i]
if (i %in% train_indices){
file.copy(filename,paste0("NeuralNetwork/Train/",gsub("_.*","",gsub("lowResCrop_TF//","",filename))))
} else {
file.copy(filename,paste0("NeuralNetwork/Test/",gsub("_.*","",gsub("lowResCrop_TF//","",filename))))
}
}
##### Note -- Sometimes some hidden files are generated during copy paste, presumably from Fiji -- These throw an error later
##### Delete them before proceeding : Navigate to folder NeuralNetwork folder (containing train and test folders) and execute the below command
##### find . -name ".*" -exec rm -rf {} \;
### Image dimensions - Note that some of our images are not 20x20, can be a bit lower (edge cases) - Will they be handled properly?
img_width <- 20
img_height <- 20
target_size <- c(img_width, img_height)
channels <- 3 #RGB
# path to image folders
train_image_files_path <- "NeuralNetwork/Train/"
valid_image_files_path <- "NeuralNetwork/Test/"
### Augment training data (but not test data!)
train_data_gen = image_data_generator(
rescale = 1/255,
rotation_range = 180,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2, ## radian value: corresponds to shear angle of max ~10 degrees
#zoom_range = 0.2,
horizontal_flip = TRUE,
vertical_flip=TRUE,
fill_mode = "nearest"
)
# Validation data shouldn't be augmented! But it should also be scaled.
valid_data_gen <- image_data_generator(
rescale = 1/255
)
# training images
train_image_array_gen <- flow_images_from_directory(train_image_files_path,
train_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = phenotype_list,
seed = 42)
# validation images
valid_image_array_gen <- flow_images_from_directory(valid_image_files_path,
valid_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = phenotype_list,
seed = 42)
table(factor(train_image_array_gen$classes)) ## can see some class imbalance ! - lets explore later if you want to tweak here
train_image_array_gen$class_indices ## blobs are classified as 0, LowSig as 1 and territories as 2
#### Define model
# number of training samples
train_samples <- train_image_array_gen$n
# number of validation samples
valid_samples <- valid_image_array_gen$n
# define batch size and number of epochs
batch_size <- 32
epochs <- 10
## Model
# initialise model
#model <- keras_model_sequential()
# add layers
# model %>%
# ### too many output nodes? -- maybe reduce to 16 and next layer to 8?
# layer_conv_2d(filter = 16, kernel_size = c(3,3), padding = "same", input_shape = c(img_width, img_height, channels)) %>%
# layer_activation("relu") %>%
#
# # Second hidden layer
# layer_conv_2d(filter = 8, kernel_size = c(3,3), padding = "same") %>%
# layer_activation_leaky_relu(0.5) %>%
# layer_batch_normalization() %>%
#
# # Use max pooling
# layer_max_pooling_2d(pool_size = c(2,2)) %>%
# layer_dropout(0.25) %>%
#
# # Flatten max filtered output into feature vector
# # and feed into dense layer
# layer_flatten() %>%
# layer_dense(64) %>%
# layer_activation("relu") %>%
# layer_dropout(0.5) %>%
#
# # Outputs from dense layer are projected onto output layer
# layer_dense(output_n) %>%
# layer_activation("softmax")
model <- keras_model_sequential()
model %>%
layer_flatten(input_shape = c(20, 20)) %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dense(units = 3, activation = 'softmax')
# compile
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_rmsprop(), ### using default optimizer values ! -- Need to check !
metrics = "accuracy"
)
### fit model
# dir.create("checkpoints")
# dir.create("logs")
hist <- model %>% fit_generator(
# training data
train_image_array_gen,
# epochs
steps_per_epoch = as.integer(train_samples / batch_size),
epochs = epochs,
# validation data
validation_data = valid_image_array_gen,
validation_steps = as.integer(valid_samples / batch_size),
# print progress
verbose = 2,
callbacks = list(
# save best model after every epoch
callback_model_checkpoint("/Users/ra36doj/Desktop/mount/DeepLearning/ImageClassifier/240420_test/checkpoints", save_best_only = TRUE),
# only needed for visualising with TensorBoard
callback_tensorboard(log_dir = "/Users/ra36doj/Desktop/mount/DeepLearning/ImageClassifier/240420_test/logs")
)
)
For now, go with xgboost model to get preliminary predictions for final dataset.
First display images from each genotype (12 total) that we need to classify.
#newData <- list.files(path = "TestImages/",pattern=".tif$",full.names = T)
newData <- list.files(path = "Images_classify/",pattern=".tif$",full.names = T)
## 6 unique categories -
#newData_categories <- unique(gsub("_\\d+_RGB.*\\.tif$","",gsub("TestImages//Default.*_67_","",newData)))
#newData_categories <- unique(gsub("_\\d+_RGB.*\\.tif$","",gsub("Images_classify//dapi_gfp_.*","",newData)))
newData_categories <- unique(gsub("^_","",regmatches(newData, regexpr("_[^_]+_[^_]+_[^_]+(?=_\\d+_\\d+_RGB\\.tif$)", newData, perl=TRUE))))
## Display 6 nuclei from each cateogory
image_raw <- lapply(list_files, function(i) readTIFF(i,as.is = T)[,,2])
i <- 1
j <- 1
set.seed(12554)
par(mfrow=c(2,3)) ## adjust margins based on number of images
for(i in 1:length(newData_categories)){
my_category_images <- newData[grep(newData_categories[i],newData)]
my_category_images_sub <- my_category_images[sample.int(length(my_category_images),6)]
for (j in 1:6){
image(readTIFF(my_category_images_sub[j],as.is=T)[,,2],col=gray.colors(256), main=paste(newData_categories[i],"_",j),asp=1)
axis(1,0:1,0:1)
}
}
Calculate features and predict.
res_method_newData <- as.data.frame(matrix(nrow=length(newData),ncol=46))
i <- 1
for (i in 1:length(newData)){
y <- readImage(newData[i])[,,2]
#EBImage::display(y)
#hist(as.vector(imageData(y))) #### for visualization purposes in test cases -- Our background is around 0.15-0.2 scaled intensity
#### Compute Haralick features for entire image
x <- Image(1,dim=dim(y))
res_method_newData[i,1:26] <- colMeans(computeFeatures.haralick(x, y))
### Add mean intensity of image
res_method_newData[i,27] <- mean(imageData(y))
### Additional object dependent features
### explore thresholding for object features
## Local background based thresholding to generate binary image
y_2 <- gblur(y,sigma=0.5) ### mild blurring - sigma must be selected appropriately !
#EBImage::display(y_2,all=T)
cutoff <- 0.3 ## background cutoff selected based on histogram
x <- y_2 > cutoff
#EBImage::display(x,all=T)
#if no objects detected, then object feature is 0
if(sum(imageData(x))==0){
res_method_newData[i,28:46] <- 0
} else {
res_method_newData[i,28:46] <- c(computeFeatures.moment(x,y), computeFeatures.shape(x), computeFeatures.basic(x,y))
}
rownames(res_method_newData)[i] <- gsub("_RGB.tif","",gsub("Images_classify//","",newData[i]))
}
df_res_newData <- data.frame(res_model_xgboost_lastfit %>% extract_workflow() %>% predict(res_method_newData),res_model_xgboost_lastfit %>% extract_workflow() %>% predict(res_method_newData,type="prob"),rownames(res_method_newData))
###### Predictions look good for single cells, but a fraction of images have >1 nuclei in field of view - This throws off predictions more in
##### favour of territories as its localized in some sense
df_res_newData <- df_res_newData %>% mutate(Genotype=case_when(grepl(newData_categories[1],rownames.res_method_newData.)~newData_categories[1],
grepl(newData_categories[2],rownames.res_method_newData.)~newData_categories[2],
grepl(newData_categories[3],rownames.res_method_newData.)~newData_categories[3],
grepl(newData_categories[4],rownames.res_method_newData.)~newData_categories[4],
grepl(newData_categories[5],rownames.res_method_newData.)~newData_categories[5],
grepl(newData_categories[6],rownames.res_method_newData.)~newData_categories[6],
grepl(newData_categories[7],rownames.res_method_newData.)~newData_categories[7],
grepl(newData_categories[8],rownames.res_method_newData.)~newData_categories[8],
grepl(newData_categories[9],rownames.res_method_newData.)~newData_categories[9],
grepl(newData_categories[10],rownames.res_method_newData.)~newData_categories[10],
grepl(newData_categories[11],rownames.res_method_newData.)~newData_categories[11],
grepl(newData_categories[12],rownames.res_method_newData.)~newData_categories[12]))
saveRDS(df_res_newData,"df_res_newData.rds")
head(df_res_newData)
## .pred_class .pred_Intermediate .pred_Speckles .pred_Territory
## 1 Territory 0.001845192 0.0003469431 9.978079e-01
## 2 Speckles 0.011749393 0.9881930947 5.749628e-05
## 3 Intermediate 0.939707696 0.0530534349 7.238836e-03
## 4 Speckles 0.005026811 0.9822224379 1.275078e-02
## 5 Intermediate 0.985535383 0.0082727019 6.191989e-03
## 6 Speckles 0.055980500 0.9434951544 5.243666e-04
## rownames.res_method_newData. Genotype
## 1 dapi_gfp_0001-0018_rox2ko17_gfp_rox2_1_0 rox2ko17_gfp_rox2
## 2 dapi_gfp_0001-0021_rox2ko112_gfp_A3B0_1_18 rox2ko112_gfp_A3B0
## 3 dapi_gfp_0001-0021_rox2ko112_gfp_rox1d3f_1_20 rox2ko112_gfp_rox1d3f
## 4 dapi_gfp_0001-0022_rox2ko17_gfp_minirox_1_2 rox2ko17_gfp_minirox
## 5 dapi_gfp_0001-0023_rox2ko112_gfp_A3B0_2_19 rox2ko112_gfp_A3B0
## 6 dapi_gfp_0001-0023_rox2ko112_gfp_rox2_1_12 rox2ko112_gfp_rox2
df_res_newData %>% select(Genotype,.pred_class) %>% group_by(Genotype) %>% count(.pred_class) %>% mutate(Fraction=n/sum(n)) %>%
ggplot(aes(x=factor(Genotype),y=Fraction,fill=.pred_class)) + geom_col(width=0.75) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
While the accuracy can be improved in the future, preliminary results already give us an indication of trends. Clearly full length rox2 generates the most territories. A0B0 and rox1d3c produces a mix of intermediates and territories while the rest are significantly worse at territory formation (.ie. lead to speckled phenotype)
Inspecting emission probabilities of example images gives some ideas on how to improve. In summary, to improve: 1) Improved class labels for training data -> Some images geniunely appear to have wrong labels 2) Improved image resolution -> In previous test, images were acquired at twice the resolution compared to this time. Often, not enough pixels cover speckles. Territories appear grainy as well. 3) Better preprocessing pipeline -> In multiple cases, we have poor crops where only a part of the nuclei is in the image. Some images have multiple cells. Need to eliminate these images in preprocess pipeline